
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# DISCLAIMER AND GENERAL INFORMATION
#
# File: 00_data_prep.R
# Purpose: Data preparation
# Date: April 2025
# Data: "./data/prolific_data.csv" 
#        ".data/qualtrics_data.csv"
#
# Technical disclaimer:
# All analyses in R version 4.4.3 (2025-02-28 ucrt) -- "Trophy Case"
# R Studio 2024.12.1 Build 563 ("Kousa Dogwood" Release (27771613, 2025-02-02) for Windows)
# Windows 10 Enterprise, 64-bit
# 12th Gen Intel(R) Core(TM) i7-1255U 1.70 GHz with 16GB RAM
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


# Libraries
library("tidyverse")
library("plyr")
library("modelsummary")
library("ggpubr")
library("cowplot")
library("marginaleffects")
library("ggrepel")
library("sf")
library("car")
library("ggplot2")
library("modelsummary")
options(modelsummary_format_numeric_latex = "plain")
library("reshape2")
library("nnet")
library("mvtnorm")
library("ggstance")
library("tinytable")
library("collapse")

# Color settings: colorblind-friendly palette
# http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cols <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")



# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (A) Prolific Data ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

dt <- read.csv("./data/prolific_data.csv", comment.char="#")

# Delete first two rows
dt <- dt[-c(1,2),]

# Drop "no consent"
dt <- dt[dt$consent==1,]
dim(dt)

# Only keep fully completed data
dt <- dt[dt$Finished==1,]
dim(dt)

# Record responses for two respondents as per their open-ended comments at end of survey
dt$Brexit[dt$ID=="5c35fcaf8472fc00016edb4f"] <- 2
dt$citizen[dt$ID=="5c9bd1c1a586fd00014d5dad"] <- 4

# Clean key variables

# Residence variables
dt$residence <- ifelse(dt$residence=="1", "England",
                  ifelse(dt$residence=="2", "Wales",
                    ifelse(dt$residence=="3", "Scotland", "Northern Ireland")))

dt$gor_england <- ifelse(dt$gor_england=="1", "North East",
                    ifelse(dt$gor_england=="2", "North West",
                      ifelse(dt$gor_england=="3", "Yorkshire and Humber",
                        ifelse(dt$gor_england=="4", "East Midlands",
                          ifelse(dt$gor_england=="5", "West Midlands",      
                            ifelse(dt$gor_england=="6", "East of England",
                              ifelse(dt$gor_england=="7", "London",
                                ifelse(dt$gor_england=="8", "South East",
                                  ifelse(dt$gor_england=="9", "South West", NA)))))))))
 
dt$town <- tolower(dt$town)  

dt$urbanrural[dt$urbanrural=="4"] <- 2
dt$urbanrural <- ifelse(dt$urbanrural=="1", "urban", "rural")

# Political left-right
dt$political_id <- as.numeric(dt$political_id)

# Concern variables
dt$cc_concern <- as.numeric(dt$cc_concern)
dt$cc_concern <- (dt$cc_concern*(-1))+5

dt$trade_concern <- as.numeric(dt$trade_concern)
dt$trade_concern <- (dt$trade_concern*(-1))+5

dt$geopol_concern <- as.numeric(dt$geopol_concern)
dt$geopol_concern <- (dt$geopol_concern*(-1))+5

# DV: effective levels

# Climate change: effective level
colnames(dt)[colnames(dt)=="effect_ccpollevel_1"] <- c("effect_cc_1st")
colnames(dt)[colnames(dt)=="effect_ccpollevel_2"] <- c("effect_cc_2nd")
colnames(dt)[colnames(dt)=="effect_ccpollevel_3"] <- c("effect_cc_3rd")
colnames(dt)[colnames(dt)=="effect_ccpollevel_4"] <- c("effect_cc_4th")

dt$effect_cc_1st <- as.numeric(dt$effect_cc_1st)
dt$effect_cc_2nd <- as.numeric(dt$effect_cc_2nd)
dt$effect_cc_3rd <- as.numeric(dt$effect_cc_3rd)
dt$effect_cc_4th <- as.numeric(dt$effect_cc_4th)


# Trade: effective level
colnames(dt)[colnames(dt)=="effect_econ_pollevel_1"] <- c("effect_trade_1st")
colnames(dt)[colnames(dt)=="effect_econ_pollevel_2"] <- c("effect_trade_2nd")
colnames(dt)[colnames(dt)=="effect_econ_pollevel_3"] <- c("effect_trade_3rd")
colnames(dt)[colnames(dt)=="effect_econ_pollevel_4"] <- c("effect_trade_4th")

dt$effect_trade_1st <- as.numeric(dt$effect_trade_1st)
dt$effect_trade_2nd <- as.numeric(dt$effect_trade_2nd)
dt$effect_trade_3rd <- as.numeric(dt$effect_trade_3rd)
dt$effect_trade_4th <- as.numeric(dt$effect_trade_4th)


# Security: effective level
colnames(dt)[colnames(dt)=="effect_security_1"] <- c("effect_geopol_1st")
colnames(dt)[colnames(dt)=="effect_security_2"] <- c("effect_geopol_2nd")
colnames(dt)[colnames(dt)=="effect_security_3"] <- c("effect_geopol_3rd")
colnames(dt)[colnames(dt)=="effect_security_4"] <- c("effect_geopol_4th")

dt$effect_geopol_1st <- as.numeric(dt$effect_geopol_1st)
dt$effect_geopol_2nd <- as.numeric(dt$effect_geopol_2nd)
dt$effect_geopol_3rd <- as.numeric(dt$effect_geopol_3rd)
dt$effect_geopol_4th <- as.numeric(dt$effect_geopol_4th)

# Involvement variables
colnames(dt)[colnames(dt)=="UK_climateint_1"] <- c("cc_invol")
colnames(dt)[colnames(dt)=="UK_tradeint_EU_1"] <- c("trade_invol")
colnames(dt)[colnames(dt)=="UK_securityint_1"] <- c("geopol_invol")

dt$cc_invol <- as.numeric(dt$cc_invol)
dt$trade_invol <- as.numeric(dt$trade_invol)
dt$geopol_invol <- as.numeric(dt$geopol_invol)

# Posttreatment covariates
dt$citizen <- factor(dt$citizen, levels=c(1,2,3,4,5), 
                     labels=c("local", "devolved", "UK", "Europe", NA))
  
dt$coalaware <- as.numeric(dt$coalaware)
dt$industryaware <- as.numeric(dt$industryaware)

dt$worry_econdev <- as.numeric(dt$worry_econdev)
dt$worry_econdev[dt$worry_econdev==-99] <- NA

dt$worry_climate <- as.numeric(dt$worry_climate)
dt$worry_climate[dt$worry_climate==-99] <- NA

dt$mitig_v_adapt <- as.numeric(dt$mitig_v_adapt)
dt$mitig_v_adapt[dt$mitig_v_adapt==-99] <- NA

dt$age <- as.numeric(dt$age)
dt$gender <- ifelse(dt$gender=="1", "male",
             ifelse(dt$gender=="2", "female", "other"))

dt$empl_sector <- as.numeric(dt$empl_sector)
dt$empl_sector[dt$empl_sector==22] <- NA

dt$commute <- ifelse(dt$commute=="1", "city I live in",
                    ifelse(dt$commute=="2", "closest city",
                           ifelse(dt$commute=="3", "commute", "don't work")))

dt$education <- as.numeric(dt$education)
dt$education[dt$education==7] <- NA

dt$income <- as.numeric(dt$income)
dt$income[dt$income==9] <- NA
dt$low_income <- ifelse(dt$income<=2, 1,
                        ifelse(dt$income>2,0,NA))

colnames(dt)[colnames(dt)=="X2019_election"] <- "voted"
dt$voted <- ifelse(dt$voted=="1", "yes",
                   ifelse(dt$voted=="2", "no", NA))

dt$parties <- as.numeric(dt$parties)

dt$Brexit <- ifelse(dt$Brexit=="1", "leave", 
                    ifelse(dt$Brexit=="2", "remain", "undecided"))

# Treatment variable
colnames(dt)[colnames(dt)=="FL_39_DO"] <- "treatment"
dt$treatment <- ifelse(dt$treatment=="Control", 0, 1)

# Order variables
colnames(dt)[colnames(dt)=="FL_52_DO"] <- "order_concern"
colnames(dt)[colnames(dt)=="FL_57_DO"] <- "order_ranking"
colnames(dt)[colnames(dt)=="FL_63_DO"] <- "order_outcome"

# Add sample information
dt$survey <- c("Prolific")

# Write output
Prolific <- dt

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (B) Qualtrics Data ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

dt <- read.csv("./data/qualtrics_data.csv", comment.char="#")

# Delete first two rows
dt <- dt[-c(1,2),]

# Drop "no consent"
dt <- dt[dt$consent==1,]
dim(dt)

# Only keep fully completed data
dt <- dt[dt$Finished==1,]
dim(dt)


# Clean key variables

# Residence variables
dt$residence <- ifelse(dt$residence=="1", "England",
                       ifelse(dt$residence=="2", "Wales",
                              ifelse(dt$residence=="3", "Scotland", "Northern Ireland")))

dt$gor_england <- ifelse(dt$gor_england=="1", "North East",
                         ifelse(dt$gor_england=="2", "North West",
                                ifelse(dt$gor_england=="3", "Yorkshire and Humber",
                                       ifelse(dt$gor_england=="4", "East Midlands",
                                              ifelse(dt$gor_england=="5", "West Midlands",      
                                                     ifelse(dt$gor_england=="6", "East of England",
                                                            ifelse(dt$gor_england=="7", "London",
                                                                   ifelse(dt$gor_england=="8", "South East",
                                                                          ifelse(dt$gor_england=="9", "South West", NA)))))))))

dt$district_target <- ifelse(dt$district_target=="1", "Barnsley",
                             ifelse(dt$district_target=="2", "Cumbria",
                                ifelse(dt$district_target=="3", "Doncaster",
                                  ifelse(dt$district_target=="4", "East of Riding of Yorkshire",
                                         ifelse(dt$district_target=="5", "Leeds",
                                                ifelse(dt$district_target=="6", "Rotherham",
                                                       ifelse(dt$district_target=="7", "Sheffield",
                                                              ifelse(dt$district_target=="8", "Wakefield", NA))))))))

dt$town <- tolower(dt$town)  

dt$urbanrural[dt$urbanrural=="4"] <- 2
dt$urbanrural <- ifelse(dt$urbanrural=="1", "urban", "rural")

# Political left-right
dt$political_id <- as.numeric(dt$political_id)

# Concern variables
dt$cc_concern <- as.numeric(dt$cc_concern)
dt$cc_concern <- (dt$cc_concern*(-1))+5

dt$trade_concern <- as.numeric(dt$trade_concern)
dt$trade_concern <- (dt$trade_concern*(-1))+5

dt$geopol_concern <- as.numeric(dt$geopol_concern)
dt$geopol_concern <- (dt$geopol_concern*(-1))+5

# DV: effective levels

# Climate change: effective level
colnames(dt)[colnames(dt)=="effect_ccpollevel_1"] <- c("effect_cc_1st")
colnames(dt)[colnames(dt)=="effect_ccpollevel_2"] <- c("effect_cc_2nd")
colnames(dt)[colnames(dt)=="effect_ccpollevel_3"] <- c("effect_cc_3rd")
colnames(dt)[colnames(dt)=="effect_ccpollevel_4"] <- c("effect_cc_4th")

dt$effect_cc_1st <- as.numeric(dt$effect_cc_1st)
dt$effect_cc_2nd <- as.numeric(dt$effect_cc_2nd)
dt$effect_cc_3rd <- as.numeric(dt$effect_cc_3rd)
dt$effect_cc_4th <- as.numeric(dt$effect_cc_4th)


# Trade: effective level
colnames(dt)[colnames(dt)=="effect_econ_pollevel_1"] <- c("effect_trade_1st")
colnames(dt)[colnames(dt)=="effect_econ_pollevel_2"] <- c("effect_trade_2nd")
colnames(dt)[colnames(dt)=="effect_econ_pollevel_3"] <- c("effect_trade_3rd")
colnames(dt)[colnames(dt)=="effect_econ_pollevel_4"] <- c("effect_trade_4th")

dt$effect_trade_1st <- as.numeric(dt$effect_trade_1st)
dt$effect_trade_2nd <- as.numeric(dt$effect_trade_2nd)
dt$effect_trade_3rd <- as.numeric(dt$effect_trade_3rd)
dt$effect_trade_4th <- as.numeric(dt$effect_trade_4th)


# Security: effective level
colnames(dt)[colnames(dt)=="effect_security_1"] <- c("effect_geopol_1st")
colnames(dt)[colnames(dt)=="effect_security_2"] <- c("effect_geopol_2nd")
colnames(dt)[colnames(dt)=="effect_security_3"] <- c("effect_geopol_3rd")
colnames(dt)[colnames(dt)=="effect_security_4"] <- c("effect_geopol_4th")

dt$effect_geopol_1st <- as.numeric(dt$effect_geopol_1st)
dt$effect_geopol_2nd <- as.numeric(dt$effect_geopol_2nd)
dt$effect_geopol_3rd <- as.numeric(dt$effect_geopol_3rd)
dt$effect_geopol_4th <- as.numeric(dt$effect_geopol_4th)

# Involvement variables
colnames(dt)[colnames(dt)=="UK_climateint_1"] <- c("cc_invol")
colnames(dt)[colnames(dt)=="UK_tradeint_EU_1"] <- c("trade_invol")
colnames(dt)[colnames(dt)=="UK_securityint_1"] <- c("geopol_invol")

dt$cc_invol <- as.numeric(dt$cc_invol)
dt$trade_invol <- as.numeric(dt$trade_invol)
dt$geopol_invol <- as.numeric(dt$geopol_invol)

# Posttreatment covariates
dt$citizen <- factor(dt$citizen, levels=c(1,2,3,4,5), 
                     labels=c("local", "devolved", "UK", "Europe", NA))

dt$coalaware <- as.numeric(dt$coalaware)
dt$industryaware <- as.numeric(dt$industryaware)

dt$worry_econdev <- as.numeric(dt$worry_econdev)
dt$worry_econdev[dt$worry_econdev==-99] <- NA

dt$worry_climate <- as.numeric(dt$worry_climate)
dt$worry_climate[dt$worry_climate==-99] <- NA

dt$mitig_v_adapt <- as.numeric(dt$mitig_v_adapt)
dt$mitig_v_adapt[dt$mitig_v_adapt==-99] <- NA


dt$age <- as.numeric(dt$age)
dt$gender <- ifelse(dt$gender=="1", "male",
                    ifelse(dt$gender=="2", "female", "other"))

dt$empl_sector <- as.numeric(dt$empl_sector)
dt$empl_sector[dt$empl_sector==22] <- NA

dt$commute <- ifelse(dt$commute=="1", "city I live in",
                     ifelse(dt$commute=="2", "closest city",
                            ifelse(dt$commute=="3", "commute", "don't work")))

dt$education <- as.numeric(dt$education)
dt$education[dt$education==7] <- NA

dt$income <- as.numeric(dt$income)
dt$income[dt$income==9] <- NA
dt$low_income <- ifelse(dt$income<=2, 1,
                        ifelse(dt$income>2,0,NA))

colnames(dt)[colnames(dt)=="X2019_election"] <- "voted"
dt$voted <- ifelse(dt$voted=="1", "yes",
                   ifelse(dt$voted=="2", "no", NA))

dt$parties <- as.numeric(dt$parties)

dt$Brexit <- ifelse(dt$Brexit=="1", "leave", 
                    ifelse(dt$Brexit=="2", "remain", "undecided"))

# Treatment variable
colnames(dt)[colnames(dt)=="FL_39_DO"] <- "treatment"
dt$treatment <- ifelse(dt$treatment=="Control", 0, 1)

# Order variables
colnames(dt)[colnames(dt)=="FL_52_DO"] <- "order_concern"
colnames(dt)[colnames(dt)=="FL_57_DO"] <- "order_ranking"
colnames(dt)[colnames(dt)=="FL_63_DO"] <- "order_outcome"

# Add sample information
dt$survey <- c("Qualtrics")

# Write output
Qualtrics <- dt

# Drop dt dataframe
remove(dt)


# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (C) Merge data sources ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

full <- rbind.fill(Qualtrics,Prolific)

# Merge in coding of open-ended responses
qualdata <- read.csv2("../Data/Data_coding_readin.csv")
qualdata <- qualdata[,c(1:18)]

length(unique(qualdata$ResponseId))
length(unique(full$ResponseId))

colnames(qualdata) <- c("ResponseId", "coded_cc_local", "coded_cc_subnat", "coded_cc_nat", "coded_cc_intl", "coded_cc_brexit", "coded_cc_energy",
                        "coded_trade_local", "coded_trade_subnat", "coded_trade_nat", "coded_trade_intl", "coded_trade_brexit", "coded_trade_costofliving",
                        "coded_def_local", "coded_def_subnat", "coded_def_nat", "coded_def_intl", "coded_def_brexit")

full <- merge(full,qualdata, by="ResponseId")

# Create sample dummies
table(full$survey, full$residence)

full$coalctry <- ifelse(full$survey=="Qualtrics" & full$residence=="England", 1, 0)
full$scotland <- ifelse(full$survey=="Qualtrics" & full$residence=="Scotland", 1, 0)
full$wales <- ifelse(full$survey=="Qualtrics" & full$residence=="Wales", 1, 0)
full$genpop <- ifelse(full$survey=="Prolific", 1, 0)

full$sample <- ifelse(full$coalctry==1, "Northern England",
                ifelse(full$scotland==1, "Scotland",
                  ifelse(full$wales==1, "Wales",
                   ifelse(full$genpop==1, "General population", "NA"))))

full$sample <- factor(full$sample, levels=c("General population", "Northern England", "Scotland", "Wales"), ordered=FALSE)


# Create dummy variables for DV: climate
full$dv_climate_intl_dummy <- ifelse(full$effect_cc_1st==4,1,0)
full$dv_climate_nat_dummy <- ifelse(full$effect_cc_1st==3,1,0)
full$dv_climate_subnat_dummy <- ifelse(full$effect_cc_1st==2,1,0)
full$dv_climate_local_dummy <- ifelse(full$effect_cc_1st==1,1,0)


# Create dummy variables for DV: trade
full$dv_trade_intl_dummy <- ifelse(full$effect_trade_1st==4,1,0)
full$dv_trade_nat_dummy <- ifelse(full$effect_trade_1st==3,1,0)
full$dv_trade_subnat_dummy <- ifelse(full$effect_trade_1st==2,1,0)
full$dv_trade_local_dummy <- ifelse(full$effect_trade_1st==1,1,0)

# Create dummy variables for DV: defence
full$dv_def_intl_dummy <- ifelse(full$effect_geopol_1st==4,1,0)
full$dv_def_nat_dummy <- ifelse(full$effect_geopol_1st==3,1,0)
full$dv_def_subnat_dummy <- ifelse(full$effect_geopol_1st==2,1,0)
full$dv_def_local_dummy <- ifelse(full$effect_geopol_1st==1,1,0)

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# (D) Additional variable transformations ----
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Female dummy
full$female <- ifelse(full$gender=="female",1,0)

# Age brackets
full$young <- ifelse(full$age<3,1,0)
full$middleage <- ifelse(full$age==3 | full$age==4,1,0)
full$old <- ifelse(full$age>4,1,0)

# Education variables
full$educ_cat <- ifelse(full$education==2 | full$education==3,1,
                        ifelse(full$education==4,2,
                               ifelse(full$education>4,3,NA
                               )))

# Political orientation
full$pol_left <- ifelse(full$political_id<4,1,0)
full$pol_right <- ifelse(full$political_id>4,1,0)

# Urban dummy
full$urban <- ifelse(full$urbanrural=="urban",1,0)

# Fossil fuel industry in vicinity
full$ff_vicinity<-NA
full$ff_vicinity[(full$coalaware=="1")]<-1
full$ff_vicinity[(full$coalaware=="3")]<-1
full$ff_vicinity[(full$coalaware=="2")]<-0
table(full$ff_vicinity)

# Heavy industry in in vicinity
full$ind_vicinity<-NA
full$ind_vicinity[(full$industryaware=="1")]<-1
full$ind_vicinity[(full$industryware=="3")]<-1
full$ind_vicinity[(full$industryaware=="2")]<-0
table(full$ind_vicinity)


# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#                     END OF FILE
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

